# if (!require("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# 
# BiocManager::install("DESeq2")

library("DESeq2")
library("ggplot2")
library("ggrepel")
library("dplyr")

data1=read.csv("D:/大三下/R语言/R语言作业/mRNA_exprSet.csv")
countData=data1
#数据处理
rownames(countData)=countData[,1]#将第一行设为列名并删除原来的行
countData=countData[,-1]
colData=data.frame(colnames(countData))
#循环设置样本的类型condition
for(i in 1:123){
  if(substring(colData[i,1],14,15)=="01")
    colData$condition[i]="tumor"
  else 
    colData$condition[i]="normal"
}
row.names(colData)=colData$colnames.countData.

#不知道为什么colData的第一列删除不了，所以只能再建一个coldata2
coldata2=data.frame(colData$condition,row.names =colnames(countData))
#差异表达分析
dds=DESeqDataSetFromMatrix(countData = countData, colData = coldata2, design=~colData.condition )
head(dds)
dds2=DESeq(dds)
res <- results(dds2)
#结果显示
summary(res)
#将结果显示为表格，并准备画火山图
res1 <- data.frame(res, stringsAsFactors = FALSE, check.names = FALSE)
#将结果排序
res1 <- res1[order(res1$pvalue, res1$log2FoldChange, decreasing = c(FALSE, TRUE)), ]
#获取差异表达基因,并给他们标标签
res1$threshold[res1$padj<0.05&res1$log2FoldChange>=1]="up"
res1$threshold[res1$padj<0.05&res1$log2FoldChange<=-1]="down"
res1$threshold[!((res1$padj<0.05&res1$log2FoldChange>=1)|(res1$padj<0.05&res1$log2FoldChange <=-1))]="non"
#把行名变为第一列
res1=cbind(ID=row.names(res1), res1)
#绘制火山图
jpeg(file="火山图2.jpg", width = 1800, height = 1800,units = "px")
#选取上调下调差异显著的前十个基因
top_20=bind_rows(
  res1%>%
    filter(threshold=="up")%>%
    arrange(padj,desc(abs(log2FoldChange)))%>%
    head(10),
  res1%>%
    filter(threshold=="down")%>%
    arrange(padj,desc(abs(log2FoldChange)))%>%
    head(10)
)
#绘制火山图
ggplot(data=res1,aes(x=res1$log2FoldChange,y=-log10(res1$padj),colour=threshold))+
  xlab("log2 fold change")+ylab("-log10 padj")+
  geom_point(size=6,alpha=0.6)+
  scale_color_manual(values = c('up'="red",'down'="blue",'non'="grey"))+
  geom_hline(yintercept=-log10(0.05),linetype=3)+
  #给火山图加线
  geom_vline(xintercept=c(-1,1),linetype=3)+
  #改变横纵坐标和图例的文字大小
  theme(axis.text=element_text(size=35),
        axis.title=element_text(size=35,face="bold"),
        legend.text=element_text(size=35),
        legend.title=element_text(size=35))+
 #改变图例的大小
   guides(color=guide_legend(override.aes = list(size=30,alpha=1)))+
  #标记差异最显著的上调下调的各十个基因
  geom_text_repel(data = top_20,
                                                            aes(x=log2FoldChange,y=-log10(padj),label = top_20$ID),
                                                            size = 8,
                                                            box.padding = unit(0.5, "lines"),
                                                            point.padding = unit(0.8, "lines"), segment.color = "black", show.legend = FALSE)



dev.off()
  
                  